home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Language/OS - Multiplatform Resource Library
/
LANGUAGE OS.iso
/
a_utils
/
yacc
/
flexyacc
/
aflex.lha
/
aflex
/
src
/
dfaB.a
< prev
next >
Wrap
Text File
|
1991-05-16
|
30KB
|
942 lines
-- Copyright (c) 1990 Regents of the University of California.
-- All rights reserved.
--
-- This software was developed by John Self of the Arcadia project
-- at the University of California, Irvine.
--
-- Redistribution and use in source and binary forms are permitted
-- provided that the above copyright notice and this paragraph are
-- duplicated in all such forms and that any documentation,
-- advertising materials, and other materials related to such
-- distribution and use acknowledge that the software was developed
-- by the University of California, Irvine. The name of the
-- University may not be used to endorse or promote products derived
-- from this software without specific prior written permission.
-- THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
-- IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
-- WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
-- TITLE DFA construction routines
-- AUTHOR: John Self (UCI)
-- DESCRIPTION converts non-deterministic finite automatons to finite ones.
-- $Header: /co/ua/self/arcadia/aflex/ada/src/RCS/dfaB.a,v 1.18 90/01/12 15:19:48 self Exp Locker: self $
with DFA, INT_IO, MISC_DEFS, TEXT_IO, MISC, TBLCMP, CCL, EXTERNAL_FILE_MANAGER;
with ECS, NFA, TSTRING, GEN, SKELETON_MANAGER; use MISC_DEFS,
EXTERNAL_FILE_MANAGER;
package body DFA is
use TSTRING;
-- check_for_backtracking - check a DFA state for backtracking
--
-- ds is the number of the state to check and state[) is its out-transitions,
-- indexed by equivalence class, and state_rules[) is the set of rules
-- associated with this state
DID_STK_INIT : BOOLEAN := FALSE;
STK : INT_PTR;
procedure CHECK_FOR_BACKTRACKING(DS : in INTEGER;
STATE : in UNBOUNDED_INT_ARRAY) is
use MISC_DEFS;
begin
if (DFAACC(DS).DFAACC_STATE = 0) then
-- state is non-accepting
NUM_BACKTRACKING := NUM_BACKTRACKING + 1;
if (BACKTRACK_REPORT) then
TEXT_IO.PUT(BACKTRACK_FILE, "State #");
INT_IO.PUT(BACKTRACK_FILE, DS, 1);
TEXT_IO.PUT(BACKTRACK_FILE, "is non-accepting -");
TEXT_IO.NEW_LINE(BACKTRACK_FILE);
-- identify the state
DUMP_ASSOCIATED_RULES(BACKTRACK_FILE, DS);
-- now identify it further using the out- and jam-transitions
DUMP_TRANSITIONS(BACKTRACK_FILE, STATE);
TEXT_IO.NEW_LINE(BACKTRACK_FILE);
end if;
end if;
end CHECK_FOR_BACKTRACKING;
-- check_trailing_context - check to see if NFA state set constitutes
-- "dangerous" trailing context
--
-- NOTES
-- Trailing context is "dangerous" if both the head and the trailing
-- part are of variable size \and/ there's a DFA state which contains
-- both an accepting state for the head part of the rule and NFA states
-- which occur after the beginning of the trailing context.
-- When such a rule is matched, it's impossible to tell if having been
-- in the DFA state indicates the beginning of the trailing context
-- or further-along scanning of the pattern. In these cases, a warning
-- message is issued.
--
-- nfa_states[1 .. num_states) is the list of NFA states in the DFA.
-- accset[1 .. nacc) is the list of accepting numbers for the DFA state.
procedure CHECK_TRAILING_CONTEXT(NFA_STATES : in INT_PTR;
NUM_STATES : in INTEGER;
ACCSET : in INT_PTR;
NACC : in INTEGER) is
NS, AR : INTEGER;
STATE_VAR, TYPE_VAR : STATE_ENUM;
use MISC_DEFS, MISC, TEXT_IO;
begin
for I in 1 .. NUM_STATES loop
NS := NFA_STATES(I);
TYPE_VAR := STATE_TYPE(NS);
AR := ASSOC_RULE(NS);
if ((TYPE_VAR = STATE_NORMAL) or (RULE_TYPE(AR) /= RULE_VARIABLE)) then
null;
-- do nothing
else
if (TYPE_VAR = STATE_TRAILING_CONTEXT) then
-- potential trouble. Scan set of accepting numbers for
-- the one marking the end of the "head". We assume that
-- this looping will be fairly cheap since it's rare that
-- an accepting number set is large.
for J in 1 .. NACC loop
if (CHECK_YY_TRAILING_HEAD_MASK(ACCSET(J)) /= 0) then
TEXT_IO.PUT(STANDARD_ERROR,
"aflex: Dangerous trailing context in rule at line ");
INT_IO.PUT(STANDARD_ERROR, RULE_LINENUM(AR), 1);
TEXT_IO.NEW_LINE(STANDARD_ERROR);
return;
end if;
end loop;
end if;
end if;
end loop;
end CHECK_TRAILING_CONTEXT;
-- dump_associated_rules - list the rules associated with a DFA state
--
-- goes through the set of NFA states associated with the DFA and
-- extracts the first MAX_ASSOC_RULES unique rules, sorts them,
-- and writes a report to the given file
procedure DUMP_ASSOCIATED_RULES(F : in FILE_TYPE;
DS : in INTEGER) is
J : INTEGER;
NUM_ASSOCIATED_RULES : INTEGER := 0;
RULE_SET : INT_PTR;
SIZE, RULE_NUM : INTEGER;
begin
RULE_SET := new UNBOUNDED_INT_ARRAY(0 .. MAX_ASSOC_RULES + 1);
SIZE := DFASIZ(DS);
for I in 1 .. SIZE loop
RULE_NUM := RULE_LINENUM(ASSOC_RULE(DSS(DS)(I)));
J := 1;
while (J <= NUM_ASSOCIATED_RULES) loop
if (RULE_NUM = RULE_SET(J)) then
exit;
end if;
J := J + 1;
end loop;
if (J > NUM_ASSOCIATED_RULES) then
--new rule
if (NUM_ASSOCIATED_RULES < MAX_ASSOC_RULES) then
NUM_ASSOCIATED_RULES := NUM_ASSOCIATED_RULES + 1;
RULE_SET(NUM_ASSOCIATED_RULES) := RULE_NUM;
end if;
end if;
end loop;
MISC.BUBBLE(RULE_SET, NUM_ASSOCIATED_RULES);
TEXT_IO.PUT(F, " associated rules:");
for I in 1 .. NUM_ASSOCIATED_RULES loop
if (I mod 8 = 1) then
TEXT_IO.NEW_LINE(F);
end if;
TEXT_IO.PUT(F, ASCII.HT);
INT_IO.PUT(F, RULE_SET(I), 1);
end loop;
TEXT_IO.NEW_LINE(F);
exception
when STORAGE_ERROR =>
MISC.AFLEXFATAL("dynamic memory failure in dump_associated_rules()");
end DUMP_ASSOCIATED_RULES;
-- dump_transitions - list the transitions associated with a DFA state
--
-- goes through the set of out-transitions and lists them in human-readable
-- form (i.e., not as equivalence classes); also lists jam transitions
-- (i.e., all those which are not out-transitions, plus EOF). The dump
-- is done to the given file.
procedure DUMP_TRANSITIONS(F : in FILE_TYPE;
STATE : in UNBOUNDED_INT_ARRAY) is
EC : INTEGER;
OUT_CHAR_SET : C_SIZE_BOOL_ARRAY;
begin
for I in 1 .. CSIZE loop
EC := ECGROUP(I);
if (EC < 0) then
EC := -EC;
end if;
OUT_CHAR_SET(I) := (STATE(EC) /= 0);
end loop;
TEXT_IO.PUT(F, " out-transitions: ");
CCL.LIST_CHARACTER_SET(F, OUT_CHAR_SET);
-- now invert the members of the set to get the jam transitions
for I in 1 .. CSIZE loop
OUT_CHAR_SET(I) := not OUT_CHAR_SET(I);
end loop;
TEXT_IO.NEW_LINE(F);
TEXT_IO.PUT(F, "jam-transitions: EOF ");
CCL.LIST_CHARACTER_SET(F, OUT_CHAR_SET);
TEXT_IO.NEW_LINE(F);
end DUMP_TRANSITIONS;
-- epsclosure - construct the epsilon closure of a set of ndfa states
--
-- NOTES
-- the epsilon closure is the set of all states reachable by an arbitrary
-- number of epsilon transitions which themselves do not have epsilon
-- transitions going out, unioned with the set of states which have non-null
-- accepting numbers. t is an array of size numstates of nfa state numbers.
-- Upon return, t holds the epsilon closure and numstates is updated. accset
-- holds a list of the accepting numbers, and the size of accset is given
-- by nacc. t may be subjected to reallocation if it is not large enough
-- to hold the epsilon closure.
--
-- hashval is the hash value for the dfa corresponding to the state set
procedure EPSCLOSURE(T : in out INT_PTR;
NS_ADDR : in out INTEGER;
ACCSET : in out INT_PTR;
NACC_ADDR, HV_ADDR : out INTEGER;
RESULT : out INT_PTR) is
NS, TSP : INTEGER;
NUMSTATES, NACC, HASHVAL, TRANSSYM, NFACCNUM : INTEGER;
STKEND : INTEGER;
STKPOS : INTEGER;
procedure MARK_STATE(STATE : in INTEGER) is
begin
TRANS1(STATE) := TRANS1(STATE) - MARKER_DIFFERENCE;
end MARK_STATE;
pragma INLINE(MARK_STATE);
function IS_MARKED(STATE : in INTEGER) return BOOLEAN is
begin
return TRANS1(STATE) < 0;
end IS_MARKED;
pragma INLINE(IS_MARKED);
procedure UNMARK_STATE(STATE : in INTEGER) is
begin
TRANS1(STATE) := TRANS1(STATE) + MARKER_DIFFERENCE;
end UNMARK_STATE;
pragma INLINE(UNMARK_STATE);
procedure CHECK_ACCEPT(STATE : in INTEGER) is
begin
NFACCNUM := ACCPTNUM(STATE);
if (NFACCNUM /= NIL) then
NACC := NACC + 1;
ACCSET(NACC) := NFACCNUM;
end if;
end CHECK_ACCEPT;
pragma INLINE(CHECK_ACCEPT);
procedure DO_REALLOCATION is
begin
CURRENT_MAX_DFA_SIZE := CURRENT_MAX_DFA_SIZE + MAX_DFA_SIZE_INCREMENT;
NUM_REALLOCS := NUM_REALLOCS + 1;
REALLOCATE_INTEGER_ARRAY(T, CURRENT_MAX_DFA_SIZE);
REALLOCATE_INTEGER_ARRAY(STK, CURRENT_MAX_DFA_SIZE);
end DO_REALLOCATION;
pragma INLINE(DO_REALLOCATION);
procedure PUT_ON_STACK(STATE : in INTEGER) is
begin
STKEND := STKEND + 1;
if (STKEND >= CURRENT_MAX_DFA_SIZE) then
DO_REALLOCATION;
end if;
STK(STKEND) := STATE;
MARK_STATE(STATE);
end PUT_ON_STACK;
pragma INLINE(PUT_ON_STACK);
procedure ADD_STATE(STATE : in INTEGER) is
begin
NUMSTATES := NUMSTATES + 1;
if (NUMSTATES >= CURRENT_MAX_DFA_SIZE) then
DO_REALLOCATION;
end if;
T(NUMSTATES) := STATE;
HASHVAL := HASHVAL + STATE;
end ADD_STATE;
pragma INLINE(ADD_STATE);
procedure STACK_STATE(STATE : in INTEGER) is
begin
PUT_ON_STACK(STATE);
CHECK_ACCEPT(STATE);
if ((NFACCNUM /= NIL) or (TRANSCHAR(STATE) /= SYM_EPSILON)) then
ADD_STATE(STATE);
end if;
end STACK_STATE;
pragma INLINE(STACK_STATE);
begin
NUMSTATES := NS_ADDR;
if (not DID_STK_INIT) then
STK := ALLOCATE_INTEGER_ARRAY(CURRENT_MAX_DFA_SIZE);
DID_STK_INIT := TRUE;
end if;
NACC := 0;
STKEND := 0;
HASHVAL := 0;
for NSTATE in 1 .. NUMSTATES loop
NS := T(NSTATE);
-- the state could be marked if we've already pushed it onto
-- the stack
if (not IS_MARKED(NS)) then
PUT_ON_STACK(NS);
null;
end if;
CHECK_ACCEPT(NS);
HASHVAL := HASHVAL + NS;
end loop;
STKPOS := 1;
while (STKPOS <= STKEND) loop
NS := STK(STKPOS);
TRANSSYM := TRANSCHAR(NS);
if (TRANSSYM = SYM_EPSILON) then
TSP := TRANS1(NS) + MARKER_DIFFERENCE;
if (TSP /= NO_TRANSITION) then
if (not IS_MARKED(TSP)) then
STACK_STATE(TSP);
end if;
TSP := TRANS2(NS);
if (TSP /= NO_TRANSITION) then
if (not IS_MARKED(TSP)) then
STACK_STATE(TSP);
end if;
end if;
end if;
end if;
STKPOS := STKPOS + 1;
end loop;
-- clear out "visit" markers
for CHK_STKPOS in 1 .. STKEND loop
if (IS_MARKED(STK(CHK_STKPOS))) then
UNMARK_STATE(STK(CHK_STKPOS));
else
MISC.AFLEXFATAL("consistency check failed in epsclosure()");
end if;
end loop;
NS_ADDR := NUMSTATES;
HV_ADDR := HASHVAL;
NACC_ADDR := NACC;
RESULT := T;
end EPSCLOSURE;
-- increase_max_dfas - increase the maximum number of DFAs
procedure INCREASE_MAX_DFAS is
begin
CURRENT_MAX_DFAS := CURRENT_MAX_DFAS + MAX_DFAS_INCREMENT;
NUM_REALLOCS := NUM_REALLOCS + 1;
REALLOCATE_INTEGER_ARRAY(BASE, CURRENT_MAX_DFAS);
REALLOCATE_INTEGER_ARRAY(DEF, CURRENT_MAX_DFAS);
REALLOCATE_INTEGER_ARRAY(DFASIZ, CURRENT_MAX_DFAS);
REALLOCATE_INTEGER_ARRAY(ACCSIZ, CURRENT_MAX_DFAS);
REALLOCATE_INTEGER_ARRAY(DHASH, CURRENT_MAX_DFAS);
REALLOCATE_INT_PTR_ARRAY(DSS, CURRENT_MAX_DFAS);
REALLOCATE_DFAACC_UNION(DFAACC, CURRENT_MAX_DFAS);
end INCREASE_MAX_DFAS;
-- ntod - convert an ndfa to a dfa
--
-- creates the dfa corresponding to the ndfa we've constructed. the
-- dfa starts out in state #1.
procedure NTOD is
ACCSET : INT_PTR;
DS, NACC, NEWDS : INTEGER;
DUPLIST, TARGFREQ, TARGSTATE, STATE : C_SIZE_ARRAY;
SYMLIST : C_SIZE_BOOL_ARRAY;
HASHVAL, NUMSTATES, DSIZE : INTEGER;
NSET, DSET : INT_PTR;
TARGPTR, TOTALTRANS, I, J, COMSTATE, COMFREQ, TARG : INTEGER;
NUM_START_STATES, TODO_HEAD, TODO_NEXT : INTEGER;
SNSRESULT : BOOLEAN;
FULL_TABLE_TEMP_FILE : FILE_TYPE;
BUF : VSTRING;
NUM_NXT_STATES : INTEGER;
use TEXT_IO;
-- this is so find_table_space(...) will know where to start looking in
-- chk/nxt for unused records for space to put in the state
begin
ACCSET := ALLOCATE_INTEGER_ARRAY(NUM_RULES + 1);
NSET := ALLOCATE_INTEGER_ARRAY(CURRENT_MAX_DFA_SIZE);
-- the "todo" queue is represented by the head, which is the DFA
-- state currently being processed, and the "next", which is the
-- next DFA state number available (not in use). We depend on the
-- fact that snstods() returns DFA's \in increasing order/, and thus
-- need only know the bounds of the dfas to be processed.
TODO_HEAD := 0;
TODO_NEXT := 0;
for CNT in 0 .. CSIZE loop
DUPLIST(CNT) := NIL;
SYMLIST(CNT) := FALSE;
end loop;
for CNT in 0 .. NUM_RULES loop
ACCSET(CNT) := NIL;
end loop;
if (TRACE) then
NFA.DUMPNFA(SCSET(1));
TEXT_IO.NEW_LINE(STANDARD_ERROR);
TEXT_IO.NEW_LINE(STANDARD_ERROR);
TEXT_IO.PUT(STANDARD_ERROR, "DFA Dump:");
TEXT_IO.NEW_LINE(STANDARD_ERROR);
TEXT_IO.NEW_LINE(STANDARD_ERROR);
end if;
TBLCMP.INITTBL;
if (FULLTBL) then
GEN.DO_SECT3_OUT;
-- output user code up to ##
SKELETON_MANAGER.SKELOUT;
-- declare it "short" because it's a real long-shot that that
-- won't be large enough
begin -- make a temporary file to write yy_nxt array into
CREATE(FULL_TABLE_TEMP_FILE, OUT_FILE);
exception
when USE_ERROR | NAME_ERROR =>
MISC.AFLEXFATAL("can't create temporary file");
end;
NUM_NXT_STATES := 1;
TEXT_IO.PUT(FULL_TABLE_TEMP_FILE, "( ");
-- generate 0 entries for state #0
for CNT in 0 .. NUMECS loop
MISC.MK2DATA(FULL_TABLE_TEMP_FILE, 0);
end loop;
TEXT_IO.PUT(FULL_TABLE_TEMP_FILE, " )");
-- force extra blank line next dataflush()
DATALINE := NUMDATALINES;
end if;
-- create the first states
NUM_START_STATES := LASTSC*2;
for CNT in 1 .. NUM_START_STATES loop
NUMSTATES := 1;
-- for each start condition, make one state for the case when
-- we're at the beginning of the line (the '%' operator) and
-- one for the case when we're not
if (CNT mod 2 = 1) then
NSET(NUMSTATES) := SCSET((CNT/2) + 1);
else
NSET(NUMSTATES) := NFA.MKBRANCH(SCBOL(CNT/2), SCSET(CNT/2));
end if;
DFA.EPSCLOSURE(NSET, NUMSTATES, ACCSET, NACC, HASHVAL, NSET);
SNSTODS(NSET, NUMSTATES, ACCSET, NACC, HASHVAL, DS, SNSRESULT);
if (SNSRESULT) then
NUMAS := NUMAS + NACC;
TOTNST := TOTNST + NUMSTATES;
TODO_NEXT := TODO_NEXT + 1;
if (VARIABLE_TRAILING_CONTEXT_RULES and (NACC > 0)) then
CHECK_TRAILING_CONTEXT(NSET, NUMSTATES, ACCSET, NACC);
end if;
end if;
end loop;
SNSTODS(NSET, 0, ACCSET, 0, 0, END_OF_BUFFER_STATE, SNSRESULT);
if (not SNSRESULT) then
MISC.AFLEXFATAL("could not create unique end-of-buffer state");
end if;
NUMAS := NUMAS + 1;
NUM_START_STATES := NUM_START_STATES + 1;
TODO_NEXT := TODO_NEXT + 1;
while (TODO_HEAD < TODO_NEXT) loop
NUM_NXT_STATES := NUM_NXT_STATES + 1;
TARGPTR := 0;
TOTALTRANS := 0;
for STATE_CNT in 1 .. NUMECS loop
STATE(STATE_CNT) := 0;
end loop;
TODO_HEAD := TODO_HEAD + 1;
DS := TODO_HEAD;
DSET := DSS(DS);
DSIZE := DFASIZ(DS);
if (TRACE) then
TEXT_IO.PUT(STANDARD_ERROR, "state # ");
INT_IO.PUT(STANDARD_ERROR, DS, 1);
TEXT_IO.PUT_LINE(STANDARD_ERROR, ":");
end if;
SYMPARTITION(DSET, DSIZE, SYMLIST, DUPLIST);
for SYM in 1 .. NUMECS loop
if (SYMLIST(SYM)) then
SYMLIST(SYM) := FALSE;
if (DUPLIST(SYM) = NIL) then
-- symbol has unique out-transitions
NUMSTATES := SYMFOLLOWSET(DSET, DSIZE, SYM, NSET);
DFA.EPSCLOSURE(NSET, NUMSTATES, ACCSET, NACC, HASHVAL, NSET);
SNSTODS(NSET, NUMSTATES, ACCSET, NACC, HASHVAL, NEWDS, SNSRESULT);
if (SNSRESULT) then
TOTNST := TOTNST + NUMSTATES;
TODO_NEXT := TODO_NEXT + 1;
NUMAS := NUMAS + NACC;
if (VARIABLE_TRAILING_CONTEXT_RULES and (NACC > 0)) then
CHECK_TRAILING_CONTEXT(NSET, NUMSTATES, ACCSET, NACC);
end if;
end if;
STATE(SYM) := NEWDS;
if (TRACE) then
TEXT_IO.PUT(STANDARD_ERROR, ASCII.HT);
INT_IO.PUT(STANDARD_ERROR, SYM, 1);
TEXT_IO.PUT(STANDARD_ERROR, ASCII.HT);
INT_IO.PUT(STANDARD_ERROR, NEWDS, 1);
TEXT_IO.NEW_LINE(STANDARD_ERROR);
end if;
TARGPTR := TARGPTR + 1;
TARGFREQ(TARGPTR) := 1;
TARGSTATE(TARGPTR) := NEWDS;
NUMUNIQ := NUMUNIQ + 1;
else
-- sym's equivalence class has the same transitions
-- as duplist(sym)'s equivalence class
TARG := STATE(DUPLIST(SYM));
STATE(SYM) := TARG;
if (TRACE) then
TEXT_IO.PUT(STANDARD_ERROR, ASCII.HT);
INT_IO.PUT(STANDARD_ERROR, SYM, 1);
TEXT_IO.PUT(STANDARD_ERROR, ASCII.HT);
INT_IO.PUT(STANDARD_ERROR, TARG, 1);
TEXT_IO.NEW_LINE(STANDARD_ERROR);
end if;
-- update frequency count for destination state
I := 1;
while (TARGSTATE(I) /= TARG) loop
I := I + 1;
end loop;
TARGFREQ(I) := TARGFREQ(I) + 1;
NUMDUP := NUMDUP + 1;
end if;
TOTALTRANS := TOTALTRANS + 1;
DUPLIST(SYM) := NIL;
end if;
end loop;
NUMSNPAIRS := NUMSNPAIRS + TOTALTRANS;
if (CASEINS and not USEECS) then
I := CHARACTER'POS('A');
J := CHARACTER'POS('a');
while (I < CHARACTER'POS('Z')) loop
STATE(I) := STATE(J);
I := I + 1;
J := J + 1;
end loop;
end if;
if (DS > NUM_START_STATES) then
CHECK_FOR_BACKTRACKING(DS, STATE);
end if;
if (FULLTBL) then
-- supply array's 0-element
TEXT_IO.PUT(FULL_TABLE_TEMP_FILE, ",");
MISC.DATAFLUSH(FULL_TABLE_TEMP_FILE);
TEXT_IO.PUT(FULL_TABLE_TEMP_FILE, "( ");
if (DS = END_OF_BUFFER_STATE) then
MISC.MK2DATA(FULL_TABLE_TEMP_FILE, -END_OF_BUFFER_STATE);
else
MISC.MK2DATA(FULL_TABLE_TEMP_FILE, END_OF_BUFFER_STATE);
end if;
for CNT in 1 .. NUMECS loop
-- jams are marked by negative of state number
if ((STATE(CNT) /= 0)) then
MISC.MK2DATA(FULL_TABLE_TEMP_FILE, STATE(CNT));
else
MISC.MK2DATA(FULL_TABLE_TEMP_FILE, -DS);
end if;
end loop;
TEXT_IO.PUT(FULL_TABLE_TEMP_FILE, " )");
-- force extra blank line next dataflush()
DATALINE := NUMDATALINES;
else
if (DS = END_OF_BUFFER_STATE) then
-- special case this state to make sure it does what it's
-- supposed to, i.e., jam on end-of-buffer
TBLCMP.STACK1(DS, 0, 0, JAMSTATE_CONST);
else -- normal, compressed state
-- determine which destination state is the most common, and
-- how many transitions to it there are
COMFREQ := 0;
COMSTATE := 0;
for CNT in 1 .. TARGPTR loop
if (TARGFREQ(CNT) > COMFREQ) then
COMFREQ := TARGFREQ(CNT);
COMSTATE := TARGSTATE(CNT);
end if;
end loop;
TBLCMP.BLDTBL(STATE, DS, TOTALTRANS, COMSTATE, COMFREQ);
end if;
end if;
end loop;
if (FULLTBL) then
TEXT_IO.PUT("yy_nxt : constant array(0..");
INT_IO.PUT(NUM_NXT_STATES - 1, 1);
TEXT_IO.PUT_LINE(" , character'first..character'last) of short :=");
TEXT_IO.PUT_LINE(" (");
RESET(FULL_TABLE_TEMP_FILE, IN_FILE);
while (not END_OF_FILE(FULL_TABLE_TEMP_FILE)) loop
TSTRING.GET_LINE(FULL_TABLE_TEMP_FILE, BUF);
TSTRING.PUT_LINE(BUF);
end loop;
DELETE(FULL_TABLE_TEMP_FILE);
MISC.DATAEND;
else
TBLCMP.CMPTMPS; -- create compressed template entries
-- create tables for all the states with only one out-transition
while (ONESP > 0) loop
TBLCMP.MK1TBL(ONESTATE(ONESP), ONESYM(ONESP), ONENEXT(ONESP), ONEDEF(
ONESP));
ONESP := ONESP - 1;
end loop;
TBLCMP.MKDEFTBL;
end if;
end NTOD;
-- snstods - converts a set of ndfa states into a dfa state
--
-- on return, the dfa state number is in newds.
procedure SNSTODS(SNS : in INT_PTR;
NUMSTATES : in INTEGER;
ACCSET : in INT_PTR;
NACC, HASHVAL : in INTEGER;
NEWDS_ADDR : out INTEGER;
RESULT : out BOOLEAN) is
DIDSORT : BOOLEAN := FALSE;
J : INTEGER;
NEWDS : INTEGER;
OLDSNS : INT_PTR;
begin
for I in 1 .. LASTDFA loop
if (HASHVAL = DHASH(I)) then
if (NUMSTATES = DFASIZ(I)) then
OLDSNS := DSS(I);
if (not DIDSORT) then
-- we sort the states in sns so we can compare it to
-- oldsns quickly. we use bubble because there probably
-- aren't very many states
MISC.BUBBLE(SNS, NUMSTATES);
DIDSORT := TRUE;
end if;
J := 1;
while (J <= NUMSTATES) loop
if (SNS(J) /= OLDSNS(J)) then
exit;
end if;
J := J + 1;
end loop;
if (J > NUMSTATES) then
DFAEQL := DFAEQL + 1;
NEWDS_ADDR := I;
RESULT := FALSE;
return;
end if;
HSHCOL := HSHCOL + 1;
else
HSHSAVE := HSHSAVE + 1;
end if;
end if;
end loop;
-- make a new dfa
LASTDFA := LASTDFA + 1;
if (LASTDFA >= CURRENT_MAX_DFAS) then
INCREASE_MAX_DFAS;
end if;
NEWDS := LASTDFA;
DSS(NEWDS) := new UNBOUNDED_INT_ARRAY(0 .. NUMSTATES + 1);
-- if we haven't already sorted the states in sns, we do so now, so that
-- future comparisons with it can be made quickly
if (not DIDSORT) then
MISC.BUBBLE(SNS, NUMSTATES);
end if;
for I in 1 .. NUMSTATES loop
DSS(NEWDS)(I) := SNS(I);
end loop;
DFASIZ(NEWDS) := NUMSTATES;
DHASH(NEWDS) := HASHVAL;
if (NACC = 0) then
DFAACC(NEWDS).DFAACC_STATE := 0;
ACCSIZ(NEWDS) := 0;
else
-- find lowest numbered rule so the disambiguating rule will work
J := NUM_RULES + 1;
for I in 1 .. NACC loop
if (ACCSET(I) < J) then
J := ACCSET(I);
end if;
end loop;
DFAACC(NEWDS).DFAACC_STATE := J;
end if;
NEWDS_ADDR := NEWDS;
RESULT := TRUE;
return;
exception
when STORAGE_ERROR =>
MISC.AFLEXFATAL("dynamic memory failure in snstods()");
end SNSTODS;
-- symfollowset - follow the symbol transitions one step
function SYMFOLLOWSET(DS : in INT_PTR;
DSIZE, TRANSSYM : in INTEGER;
NSET : in INT_PTR) return INTEGER is
NS, TSP, SYM, LENCCL, CH, NUMSTATES, CCLLIST : INTEGER;
begin
NUMSTATES := 0;
for I in 1 .. DSIZE loop
-- for each nfa state ns in the state set of ds
NS := DS(I);
SYM := TRANSCHAR(NS);
TSP := TRANS1(NS);
if (SYM < 0) then
-- it's a character class
SYM := -SYM;
CCLLIST := CCLMAP(SYM);
LENCCL := CCLLEN(SYM);
if (CCLNG(SYM) /= 0) then
for J in 0 .. LENCCL - 1 loop
-- loop through negated character class
CH := CHARACTER'POS(CCLTBL(CCLLIST + J));
if (CH > TRANSSYM) then
exit; -- transsym isn't in negated ccl
else
if (CH = TRANSSYM) then
goto BOTTOM; -- next 2
end if;
end if;
end loop;
-- didn't find transsym in ccl
NUMSTATES := NUMSTATES + 1;
NSET(NUMSTATES) := TSP;
else
for J in 0 .. LENCCL - 1 loop
CH := CHARACTER'POS(CCLTBL(CCLLIST + J));
if (CH > TRANSSYM) then
exit;
else
if (CH = TRANSSYM) then
NUMSTATES := NUMSTATES + 1;
NSET(NUMSTATES) := TSP;
exit;
end if;
end if;
end loop;
end if;
else
if ((SYM >= CHARACTER'POS('A')) and (SYM <= CHARACTER'POS('Z')) and
CASEINS) then
MISC.AFLEXFATAL("consistency check failed in symfollowset");
else
if (SYM = SYM_EPSILON) then
null; -- do nothing
else
if (ECGROUP(SYM) = TRANSSYM) then
NUMSTATES := NUMSTATES + 1;
NSET(NUMSTATES) := TSP;
end if;
end if;
end if;
end if;
<<BOTTOM>> null;
end loop;
return NUMSTATES;
end SYMFOLLOWSET;
-- sympartition - partition characters with same out-transitions
procedure SYMPARTITION(DS : in INT_PTR;
NUMSTATES : in INTEGER;
SYMLIST : in out C_SIZE_BOOL_ARRAY;
DUPLIST : in out C_SIZE_ARRAY) is
TCH, J, NS, LENCCL, CCLP, ICH : INTEGER;
DUPFWD : C_SIZE_ARRAY;
-- partitioning is done by creating equivalence classes for those
-- characters which have out-transitions from the given state. Thus
-- we are really creating equivalence classes of equivalence classes.
begin
for I in 1 .. NUMECS loop
-- initialize equivalence class list
DUPLIST(I) := I - 1;
DUPFWD(I) := I + 1;
end loop;
DUPLIST(1) := NIL;
DUPFWD(NUMECS) := NIL;
DUPFWD(0) := 0;
for I in 1 .. NUMSTATES loop
NS := DS(I);
TCH := TRANSCHAR(NS);
if (TCH /= SYM_EPSILON) then
if ((TCH < -LASTCCL) or (TCH > CSIZE)) then
MISC.AFLEXFATAL("bad transition character detected in sympartition()")
;
end if;
if (TCH > 0) then
-- character transition
ECS.MKECHAR(ECGROUP(TCH), DUPFWD, DUPLIST);
SYMLIST(ECGROUP(TCH)) := TRUE;
else
-- character class
TCH := -TCH;
LENCCL := CCLLEN(TCH);
CCLP := CCLMAP(TCH);
ECS.MKECCL(CCLTBL(CCLP .. CCLP + LENCCL), LENCCL, DUPFWD, DUPLIST,
NUMECS);
if (CCLNG(TCH) /= 0) then
J := 0;
for K in 0 .. LENCCL - 1 loop
ICH := CHARACTER'POS(CCLTBL(CCLP + K));
J := J + 1;
while (J < ICH) loop
SYMLIST(J) := TRUE;
J := J + 1;
end loop;
end loop;
J := J + 1;
while (J <= NUMECS) loop
SYMLIST(J) := TRUE;
J := J + 1;
end loop;
else
for K in 0 .. LENCCL - 1 loop
ICH := CHARACTER'POS(CCLTBL(CCLP + K));
SYMLIST(ICH) := TRUE;
end loop;
end if;
end if;
end if;
end loop;
end SYMPARTITION;
end DFA;